*** CREATING DATASET ******


** This do-file uses the  creates the final dataset ( "./LoyaltyAccountability_FINAL.dta") from the three separate survey files.

** To do this, we clean/transform the three separate survey datasets in turn, before appending the datasets, and generating additional variables for the final dataset


*************************************************


** UK SURVEY (March 2020) data clearning and transformation **


use "./UK_Mar20_surveydata.dta"



** Generate variable to identify survey **

gen survey=1


** Dependent variable (war2, support for prosecution) - remove missing/don't knows and reverse the DV ***

gen war2_new= war2

replace war2_new= . if war2==8
replace war2_new= . if war2==9
replace war2_new= . if war2==5

gen war2_f= . 
replace war2_f = 1 if war2_new==4
replace war2_f = 4 if war2_new==1
replace war2_f = 2 if war2_new==3
replace war2_f = 3 if war2_new==2


*** Generate nationalist variable using national pride variable (war1)  ****

gen war1_new= war1
replace war1_new= . if war1==5

gen nationalist=0
replace nationalist=1 if war1_new<3


**** Generate Brexit Remain/ Leave dummy ****

gen leave=0
replace leave=1 if pastvote_euref==2

gen remain=0
replace remain=1 if pastvote_euref==1


**** Generate conservative vs labour voters from variable on voting in 2019 election (vote2019r) *****

gen conservative=0

replace conservative=1 if vote2019r==1

gen labour=0

replace labour=1 if vote2019r==2


*** Generate education variable based on university education ***


gen education=0
replace education=1 if profile_education_level==16
replace education=1 if profile_education_level==17


*** Generate sample for comparison between no identity and co-national treatment only i.e. remove Estonian and leadership ***

gen samp_main=0
replace samp_main=1 if vignettesplit==1
replace samp_main=1 if vignettesplit==3


*** Generate sample for comparison with co-national treatment only i.e. remove no identity and Estonian ****

gen samp2=0
replace samp2=1 if vignettesplit>2


**** Generate sample for identity of victim (N.Ireland) ****

gen nireland=0
replace nireland=1 if vignettesplit==3
replace nireland=1 if vignettesplit==6


save "./UK_March20_surveydata_clean.dta"

***************************************************************



*** USA1 SURVEY (October 2020) data cleaning and transforming ***


use "./USA1_Oct20_surveydata.dta"


** Generate variable to identify survey **

gen survey=2
 


*** Remove missing/don't knows and reverse DV (war2) as per UK survey ***


gen war2_new= war2
replace war2_new= . if war2==5

gen war2_f= . 
replace war2_f = 1 if war2_new==4
replace war2_f = 4 if war2_new==1
replace war2_f = 2 if war2_new==3
replace war2_f = 3 if war2_new==2


**** Clean other control variables (replace don't know with missing for ideology) ***

replace ideo5= . if ideo5==6


*** Generate education variable based on university education ***

gen education=0
replace education=1 if educ==5
replace education=1 if educ==6


*** Generate sample for comparison between no identity and co-national treatment only i.e. remove Estonian and leadership ***

gen samp_main=0
replace samp_main=1 if vignettesplit==1
replace samp_main=1 if vignettesplit==3


*** Generate sample for comparison with co-national treatment only i.e. remove no identity and Estonian ****

gen samp2=0
replace samp2=1 if vignettesplit>2



*** Generate nationalist variable from national pride variable (war1) ****

gen war1_new= war1
replace war1_new= . if war1==5

gen nationalist=0
replace nationalist=1 if war1_new<3



**** Generate Trump  and Clinton voters variables based on 2016 US election variable (presvote16postx) ****


gen trump=0
replace trump=1 if presvote16postx==2

gen clinton=0
replace clinton=1 if presvote16postx==1



**** Generate conservatives and liberals variables from ideology variable ****

gen conservative = 0
replace conservative = 1 if ideo5==4
replace conservative = 1 if ideo5==5

gen liberal = 0
replace liberal = 1 if ideo5==1
replace liberal = 1 if ideo5==2


*** Generate sample for sexual abuse treatment analysis ***

gen sexualabuse=0
replace sexualabuse=1 if vignettesplit==6
replace sexualabuse=1 if vignettesplit==3


save "./USA1_Oct20_surveydata_clean.dta"


****************************************************************************



** USA2 SURVEY (APRIL 2021) **

use "./USA2_Apr21_surveydata.dta"
gen survey=3



**** Remove missing/don't knows, generate single DV, and reverse DV (war2) ****

gen war2= .

replace war2=war2a if war2a!= .
replace war2=war2b if war2b!= .

replace war2= . if war2==5

gen war2_f= . 
replace war2_f = 1 if war2==4
replace war2_f = 4 if war2==1
replace war2_f = 2 if war2==3
replace war2_f = 3 if war2==2



**** Clean other control variables (only ideology replace don't know with missing) ***

replace ideo5= . if ideo5==6

gen education=0
replace education=1 if educ==5
replace education=1 if educ==6


**** Generate nationalism variable *****

gen war1_new= war1
replace war1_new= . if war1==5

gen nationalist=0
replace nationalist=1 if war1_new<3


*** Create other political preference variables ****


gen trump=0
replace trump=1 if presvote20post==2

gen biden=0
replace biden=1 if presvote20post==1



gen conservative=0
replace conservative=1 if ideo5>3
replace conservative=0 if ideo5==6

gen liberal=0
replace liberal=1 if ideo5<3


gen vcon=0
replace vcon=1 if ideo5==5

gen vlib=0
replace vlib=1 if ideo5==1

gen moderate=0
replace moderate=1 if ideo5==3


gen republican=0
replace republican=1 if pid3==2

gen democrat=0
replace democrat=1 if pid==1


*** Generate sample for comparison with co-national treatment only i.e. remove no identity and Estonian ****

gen samp2=0
replace samp2=1 if vignettesplit>2


save "./USA2_Apr21_surveydata_clean.dta"

*******************************************************************************


**** APPEND THE THREE DATASETS (UK, USA1, USA2) *****


use "./UK_Mar20_surveydata_clean.dta"

append using "USA1_Oct20_surveydata_clean.dta"

append using "USA2_Apr21_surveydata_clean.dta"


**************************************************************************


*** Relabel different treatment/vignette groups to ensure consistency across the final dataset generating a new vignette/treatment group variable ***



** Split up Northern Ireland and sexual abuse treatments in UK and USA1

gen vignettesplit_new=vignettesplit

replace vignettesplit_new=7 if vignettesplit==6&survey==2


*** Rename vignettes for USA Survey 2 ****

replace vignettesplit_new=4 if vignettesplit==2&survey==3

replace vignettesplit_new=5 if vignettesplit==3&survey==3


replace vignettesplit_new=8 if vignettesplit==4&survey==3

replace vignettesplit_new=9 if vignettesplit==5&survey==3



save "./LoyaltyAccountability_FINAL.dta"



*********************************************************************************
